library(pheatmap)
library(data.table)
library(RColorBrewer)
library(openxlsx)
# Load crosscheck matrix
data <- as.data.frame(fread(CrossCheckOUTPUT, stringsAsFactors = F))
cat("Original data dimensions:", dim(data), "\n")
## Original data dimensions: 776 777
cat("Number of unique READGROUPS:", length(unique(as.factor(data$READGROUP))), "\n")
## Number of unique READGROUPS: 776
Data Preprocessing
# Remove self compare and specific columns
data = data[data$READGROUP %like% "_",]
data = data[,-which(names(data) %like% "_")]
# Load crosscheck metadata
crosscheck2 <- as.data.frame(fread(CrossCheckOUTPUT2))
crosscheck2 = crosscheck2[,c("LEFT_GROUP_VALUE","LEFT_SAMPLE" )]
crosscheck2 = crosscheck2[!duplicated(crosscheck2$LEFT_GROUP_VALUE) & !crosscheck2$LEFT_GROUP_VALUE %like% "_",]
# Load sample metadata
SAMPLE.EQTL <- read.xlsx(SAMPLE_SHEET, sheet=1)
SAMPLE.EQTL = SAMPLE.EQTL[SAMPLE.EQTL$TB_Status %in% "Latent TB",]
SAMPLE.EQTL = SAMPLE.EQTL[,c("genotype.ID_corrected","RNA.Sequencing_ID")]
# Merge and reorganize data
crosscheck3 = merge(crosscheck2, SAMPLE.EQTL, by.x="LEFT_SAMPLE",by.y="genotype.ID_corrected")
crosscheck3 = crosscheck3[order(crosscheck3$RNA.Sequencing_ID),]
data2 = data[data$READGROUP %in% crosscheck3$RNA.Sequencing_ID, c("READGROUP",crosscheck3$LEFT_GROUP_VALUE)]
names(data2)[2:ncol(data2)][match(crosscheck3$LEFT_GROUP_VALUE, names(data2[2:ncol(data2)]))] <- crosscheck3$RNA.Sequencing_ID
cat("Processed data dimensions:", dim(data2), "\n")
## Processed data dimensions: 213 214
Prepare matrix for Visualisation
# Set row names and convert to numeric matrix
row.names(data2) <- data2$READGROUP
data2 <- data2[,-1]
# Convert to numeric matrix
data2[] <- lapply(data2, function(x) as.numeric(as.character(gsub(",","",x))))
# Order columns and rows
data2 <- data2[, order(colnames(data2))]
data2 <- data2[order(rownames(data2)), ]
data_matrix <- data.matrix(data2)
# Apply thresholds for better visualization
data_matrix[data_matrix > 100] <- 100
data_matrix[data_matrix < -100] <- -100
cat("Final matrix dimensions:", dim(data_matrix), "\n")
## Final matrix dimensions: 213 213
Visualize Results
Heatmap Visualisation
# Create heatmap with improved formatting
pheatmap(data_matrix,
col = colorRampPalette(brewer.pal(4, "YlGn"))(10),
cluster_rows = FALSE,
cluster_cols = FALSE,
fontsize_row = 6,
fontsize_col = 6,
main = "DNA-RNA Crosscheck Matrix\n(TB Samples)",
show_rownames = TRUE,
show_colnames = TRUE)
